In [2]:
import pandas as pd
import ga4gh.client
print(ga4gh.__version__)
gc = ga4gh.client.HttpClient("http://localhost:8000")
region_constraints = dict(referenceName="1", start=0, end=int(1e10))
In [3]:
data_sets = pd.DataFrame(ds.toJsonDict() for ds in gc.searchDatasets())
data_sets.head()
Out[3]:
In [4]:
variant_sets = pd.DataFrame([
{'data_set_id': ds.id,
'variant_set_id': vs.id,
'variant_set_name': vs.name}
for ds in gc.searchDatasets()
for vs in gc.searchVariantSets(ds.id)
])
variant_sets.head()
Out[4]:
In [5]:
call_sets = pd.DataFrame([
{
'data_set_id': ds.id,
'variant_set_id': vs.id,
'variant_set_name': vs.name,
'call_set_id': cs.id,
'call_set_name': cs.name,
}
for ds in gc.searchDatasets()
for vs in gc.searchVariantSets(ds.id)
for cs in gc.searchCallSets(vs.id)
])
call_sets.head()
Out[5]:
In [6]:
call_sets = pd.DataFrame([
{
'data_set_id': ds.id,
'variant_set_id': vs.id,
'variant_set_name': vs.name,
'variant_annotation_set_id': vas.id,
'variant_annotation_set_name': vas.name,
}
for ds in gc.searchDatasets()
for vs in gc.searchVariantSets(ds.id)
for vas in gc.searchVariantAnnotationSets(vs.id)
])
call_sets.head()
Out[6]:
In [7]:
call_sets = pd.DataFrame([
{
'data_set_id': ds.id,
'variant_set_id': vs.id,
'variant_set_name': vs.name,
'n_callsets': sum(w for _ in gc.searchCallSets(vs.id)),
'n_variants': sum(1 for _ in gc.searchVariants(vs.id, **region_constraints)),
'n_annotation_sets': sum(1 for _ in gc.searchVariantAnnotationSets(vs.id)),
'n_annotations': sum(1
for vas in gc.searchVariantAnnotationSets(vs.id)
for _ in gc.searchVariantAnnotations(vas.id, **region_constraints)
),
}
for ds in gc.searchDatasets()
for vs in gc.searchVariantSets(ds.id)
for vas in gc.searchVariantAnnotationSets(vs.id)
])
call_sets.head()
Out[7]:
In [ ]:
In [ ]: